# accuracy plots
{
  # obtain data
  FINALMAT <- R_final_ <- as.data.frame(RX_final_master)

  for(figType_ in c("","_appendix")){
  if(figType_ == ""){ iterateOver_ <-  (1:length(save_names_vec))[!grepl(save_names_vec,pattern="markov")] }  
  if(figType_ == "_appendix"){ iterateOver_ <-  1:length(save_names_vec) }  
    
  # setup plotting parameters 
  addLetters <- F; lettersCex <- 1.25 
  LEGEND_YSCALING <- 2.15; TOP_SPACE <- 4.
  cex_axis <- 1.2; thres_ <- -1  # plotting threshold
  nCuts <- 1; divideFxn <- function(x){
    x[x==0]<-NA
    present_indices <- try(which.min(abs(x-median(x))),T);if(nCuts > 1){ 
      present_indices <- which(!duplicated( cuts_ <- as.numeric( cut(log(x+1),nCuts))  ))
      present_indices <- present_indices[!is.na(cuts_[present_indices])]
    }
    if(length(present_indices) == 0 | "try-error" %in% class(present_indices)){
      present_indices <- sample(1:length(x),1)
    }
    return( present_indices )
  }

  # ROC curve
  pdf(sprintf('./Figures/Example%s_reduceData%s_roc%s.pdf',exampleNum_i,reduceData,figType_))
  {
    # see https://en.wikipedia.org/wiki/False_positive_rate
    take_indices <- seq(1,length(FINALMAT$maxDist),length.out=6)
    y_max_axis_ <- max(0.01+apply(FINALMAT[,grep(colnames(FINALMAT),pattern="TruePositives")]/
          nrow(z_red_human),2,function(zer){max(zer,na.rm=T)}))
    x_max_axis_ <- 0.01
    ylim_ <- c(min_axis_ep<-0.0001,y_max_axis_)
    xlim_ <- c(min_axis_ep,x_max_axis_)
    par(mar=c(5, 8, TOP_SPACE, 1) )
    plot(0.01,
         xlim = xlim_,
         ylim = ylim_,
         log = '',cex=0,col="white",
         cex.lab = 2, xlab = "False Positive Rate",
         lwd=3, xaxt = "n",yaxt = "n",
         cex.axis = cex_axis,
         ylab = "True Positive Rate\n")
    abline(a=0,b=1,lty = 10,col="gray")
    sfsmisc::eaxis(1,cex.axis = cex_axis)  # x-axis
    sfsmisc::eaxis(2,cex.axis = cex_axis)  # x-axis

    for(i_ in iterateOver_){
      letter_i <- save_names_letters[i_]
      save_name_i_sub <- save_name_sub[i_]
      plot_col_ <- plot_col_vec[i_]
      plot_lty_ <- plot_lty_vec[i_]
      denom_ <- eval(parse(text=sprintf("R_final_$%s_FalsePositive + R_final_$%s_TrueNegative",
                                        save_name_i_sub,save_name_i_sub)))
      xmark_ <- eval(parse(text=sprintf("R_final_$%s_FalsePositive/denom_",save_name_i_sub)))
      ymark_ <- eval(parse(text=sprintf("R_final_$%s_TruePositives/nrow(z_red_human)",save_name_i_sub)))
      points(xmark_+min_axis_ep, ymark_+min_axis_ep, lwd=3,
             type = "l",lty= plot_lty_, col = plot_col_,
             pch = letter_i)
      take_indices_ <- try(eval(parse(text=sprintf("divideFxn(FINALMAT$%s_MatchedDatasetSize)",save_name_i_sub))),T)
      if(addLetters){
      text(xmark_[take_indices_]+min_axis_ep, ymark_[take_indices_]+min_axis_ep,
           labels = letter_i,
             lty = plot_lty_, col = plot_col_,
             cex = lettersCex)
      }
    }
  }
  dev.off()

  # save legend 
  pdf(sprintf('./Figures/Example%s_reduceData%s_legend%s.pdf',exampleNum_i,reduceData,figType_),
      height = 6, width = 3.25)
  {
  par(xpd=TRUE); plot.new()
  legend("top",
         legend = pretty_save_names_vec[iterateOver_],
         lwd = 3, bty = 'n',
         lty = plot_lty_vec[iterateOver_],
         col = plot_col_vec[iterateOver_],
         cex = 2)
  }
  dev.off()

  # true positive rate
  pdf(sprintf('./Figures/Example%s_reduceData%s_truePositives%s.pdf',exampleNum_i,reduceData,figType_))
  {
    FINALMAT <- R_final_
    par(mar=c(5, 5, TOP_SPACE, 1) )
    eps_ = 1
    dist_coarse <- 10^(seq(-4,0,length.out = 8)); dist_coarse <- round(dist_coarse,3L)
    xlim_ <- 1+summary(unlist(FINALMAT[,grepl(colnames(FINALMAT),pattern="MatchedDatasetSize")]) )[c(1,6)]
    ylim__ <- c(0, max(FINALMAT[,grepl(colnames(FINALMAT),pattern="TruePos")], na.rm = T)/
                  nrow(z_red_human) )
    plot(0,type='l',cex=2,lwd=1.5,col='white',
         cex.lab = 2, xlab = 'Size of Matched Dataset',
         ylab = 'True Positive Rate',
         ylim = ylim__,
         xaxt = 'n',
         xlim=xlim_,
         cex.axis = cex_axis,
         log="x",pch='',lty=2)
    sfsmisc::eaxis(1, at = 10^(0:100),cex.axis = cex_axis)

    for(i_ in iterateOver_){
      letter_i <- save_names_letters[i_]
      save_name_i_sub <- save_name_sub[i_]
      plot_lty_i <- plot_lty_vec[i_]
      plot_col_i <- plot_col_vec[i_]

      matched_data_size_ <- eval(parse(text=sprintf("FINALMAT$%s_MatchedDatasetSize",save_name_i_sub)))
      TruePos_ = eval(parse(text=sprintf("FINALMAT$%s_TruePositives/( nrow(z_red_human) )",save_name_i_sub)))
      if(length(unique(matched_data_size_)) == 1){
        points(newx_<- (seq(xlim_[1],xlim_[2],length.out=length(TruePos_))+eps_),
               TruePos_,pch=' ',cex=2,type='l',
               col = plot_col_i,
               lty = plot_lty_i, lwd = 4)
        if(addLetters){ 
        take_indices_ <- try(eval(parse(text="divideFxn(TruePos_)")),T)
        text(newx_[take_indices_],
             TruePos_[take_indices_], labels=letter_i,
             cex = lettersCex, col = plot_col_i)
        }
      }
      if(length(unique(matched_data_size_) > 1)){
        whichNon0 <- which( matched_data_size_>0 )
        take_indices_ <- try(eval(parse(text=sprintf("divideFxn(FINALMAT$%s_MatchedDatasetSize[whichNon0])",save_name_i_sub))),T)
        points((matched_data_size_+eps_)[whichNon0],
               TruePos_[whichNon0],pch=' ',cex=2,type='l',col=plot_col_i,
               lty = plot_lty_i, lwd = 4)
        if(addLetters){
        text(matched_data_size_[whichNon0][take_indices_]+eps_,
           TruePos_[whichNon0][take_indices_],
           labels = letter_i,cex = lettersCex, col = plot_col_i)
        }
      }
    }
  }
  dev.off()

  # F score figures 
  pdf(sprintf('./Figures/Example%s_reduceData%s_Fscore%s.pdf',exampleNum_i,reduceData,figType_))
  {
    par(mar=c(5, 5, TOP_SPACE, 1) )
    FINALMAT <- R_final_
    eps_ = 1;dist_coarse <- 10^(seq(-4,0,length.out = 8)); dist_coarse <- round(dist_coarse,3L)
    my_ylim <- c(0, max((1+ (beta_ <- 2)^2)*FINALMAT[,grep(colnames(FINALMAT),pattern="TruePositives")] /
                   (0.001+(1+beta_^2)*FINALMAT[,grep(colnames(FINALMAT),pattern="TruePositives")] +
                      beta_^2  *FINALMAT[,grep(colnames(FINALMAT),pattern="FalseNegative")] +
                      1*FINALMAT[,grep(colnames(FINALMAT),pattern="FalsePositive")]), na.rm = T))
    log_x_<-"x";
    xlim_ <- 1+summary(unlist(FINALMAT[,grepl(colnames(FINALMAT),pattern="MatchedDatasetSize")]) )[c(1,6)]
    if(exampleNum_i == 5){dist_coarse <- seq(from=xlim_[1],to=xlim_[2],length.out=5)}
    plot(0,type='l',col="white", xlim = xlim_,
         cex.lab = 2, xlab = 'Size of Matched Dataset',
         ylab = expression(F[2]~Score),
         ylim = my_ylim,lwd=lwd_<-3,xaxt = 'n',
         log=log_x_,cex.axis = cex_axis,
         pch='',cex=2)
    sfsmisc::eaxis(1, at = 10^(0:100), cex.axis = cex_axis)

    for(i_ in iterateOver_){
      letter_i <- save_names_letters[i_]
      save_name_i_sub <- save_name_sub[i_]
      plot_col_i <- plot_col_vec[i_]
      plot_lty_i <- plot_lty_vec[i_]
      match_size_ <- eval(parse(text=sprintf("FINALMAT$%s_MatchedDatasetSize",save_name_i_sub)))
      f2_ <- eval(parse(text=sprintf("(1+beta_^2)*FINALMAT$%s_TruePositives /
                        (0.001+(1+beta_^2)*FINALMAT$%s_TruePositives + beta_^2 * FINALMAT$%s_FalseNegative + FINALMAT$%s_FalsePositive)",
                                        save_name_i_sub,save_name_i_sub,save_name_i_sub,save_name_i_sub)))
      if(length(unique(match_size_)) == 1){
        take_indices_ <- try(eval(parse(text=sprintf("divideFxn(FINALMAT$%s_MatchedDatasetSize)",save_name_i_sub))),T)
        points(newx_<- (seq(xlim_[1],xlim_[2],length.out=length(TruePos_))+eps_),
               f2_,pch=' ',cex=2,type='l',col=plot_col_i,lty=plot_lty_i, lwd = 4)
        if(addLetters){
        text(newx_[take_indices_], f2_[take_indices_],
             labels = letter_i, cex = lettersCex, col = plot_col_i) 
        }
      }
      if(length(unique(match_size_) > 1)){
        whichNon0 <- which( match_size_>0 )
        take_indices_ <- try(eval(parse(text=sprintf("divideFxn(FINALMAT$%s_MatchedDatasetSize[whichNon0])",save_name_i_sub))),T)
        points((match_size_+eps_)[whichNon0],
               f2_[whichNon0],pch=' ',cex=2,type='l',col=plot_col_i,lty=plot_lty_i, lwd = 4)
        if(addLetters){
          text(match_size_[whichNon0][take_indices_]+eps_,
               f2_[whichNon0][take_indices_],labels=letter_i,
               cex=lettersCex,col=plot_col_i)
        }
      }
    }
  }
  dev.off()

  # speed analysis
  fixZeroEndings <- function(zr,roundAt=2){
    unlist( lapply(strsplit(as.character(zr),split="\\."),function(l_){
      if(length(l_) == 1){ retl <- paste(l_, paste(rep("0",times=roundAt),collapse=""),sep=".") }
      if(length(l_) == 2){
        retl <- paste(l_[1], paste(l_[2], paste(rep("0",times=roundAt-nchar(l_[2])),collapse=""),sep=""),
                      sep = ".") }
      return( retl  )
    }) ) }
  ExecutionTimeMat <- cbind(c(pretty_save_names_vec),
                            c(execution_time))
  ExecutionTimeMat[,2] <- fixZeroEndings( round(f2n(ExecutionTimeMat[,2]),2L))
  colnames(ExecutionTimeMat) <- c("Algorithm","Run Time (mins)")
  if(exampleNum_i == 2){tab_label <- "Run time on the meetings data analysis."}
  if(exampleNum_i == 4){tab_label <- "Run time on the company lobbying data analysis."}
  if(exampleNum_i == 5){tab_label <- "Run time on the cross-language merge task."}
  if(exampleNum_i == 6){tab_label <- "Run time on the personal name match task."}
  if(exampleNum_i == 7){tab_label <- "Run time on the Y Combinator task."}
  ExecutionTimeMat[,2] <- gsub(ExecutionTimeMat[,2],pattern="NA\\.00", replace = "<1.00")
  stargazer_text <- capture.output( stargazer::stargazer(   ExecutionTimeMat,
                                                          font.size = NULL,
                                                          label = sprintf("tab:ExTime%s",exampleNum_i),
                                                          title = tab_label)  )
  write(stargazer_text,file = sprintf("./Figures/ExecutionTime%s.tex",exampleNum_i))

  
  # coefficient analysis 
  if(exampleNum_i == 4){
    pdf(sprintf('./Figures/Example%s_reduceData%s_Application%s.pdf',exampleNum_i,reduceData,figType_))
    {
    par(mar=c(5, 5, TOP_SPACE, 1) )
    xlim_ <- 1+summary(unlist(FINALMAT[,grepl(colnames(FINALMAT),pattern="MatchedDatasetSize")]) )[c(1,6)]
    plot(0,type='l',cex=2,lwd=1.5,col='white',
         cex.lab = 2, xlab = 'Size of Matched Dataset',
         ylab = 'Estimated Coefficient',
         ylim = ylim__<-summary(c(COEF_TRUE,unlist(FINALMAT[,grep(colnames(FINALMAT),pattern="RegCoef")])))[c(1,6)]*c(1,1.15),
         xaxt = 'n',
         xlim=xlim_,
         cex.axis = cex_axis,
         log="x",pch='',lty=2)
    sfsmisc::eaxis(1, at = 10^(0:100),cex.axis = cex_axis)
    points(1000,0,pch="|",col="darkgray",cex=3)

    for(i_ in iterateOver_){
      letter_i <- save_names_letters[i_]
      save_name_i_sub <- save_name_sub[i_]
      plot_col_i <- plot_col_vec[i_]
      plot_lty_i <- plot_lty_vec[i_]

      matched_data_size_ <- eval(parse(text=sprintf("FINALMAT$%s_MatchedDatasetSize",save_name_i_sub)))
      whichNon0 <- which(matched_data_size_>0)
      take_indices_ <- try(eval(parse(text=sprintf("divideFxn(FINALMAT$%s_MatchedDatasetSize[whichNon0])",save_name_i_sub))),T)
      TruePos_ = eval(parse(text=sprintf("FINALMAT$%s_RegCoef",save_name_i_sub)))
      points(matched_data_size_[whichNon0]+eps_, TruePos_[whichNon0],pch=' ',cex=2,type='l',
             col=plot_col_i, lty = plot_lty_i, lwd = 4)
      if(addLetters){
      text(matched_data_size_[whichNon0][take_indices_]+eps_,
           TruePos_[whichNon0][take_indices_],labels=letter_i,cex=lettersCex,
           col=plot_col_i,lty = plot_lty_i)
      }
    }
    abline(h=COEF_TRUE_LB,lwd=3,lty = 2,col="lightgray")
    abline(h=COEF_TRUE,lwd=3,lty = 1,col="darkgray")
    abline(h=COEF_TRUE_UB,lwd=3,lty = 2,col="lightgray")
    text(1000,COEF_TRUE,labels = "Coefficient Using Correct Matches \n (From Human Coding)",cex=1.25,col="darkgray")
    }
    dev.off()

    pdf(sprintf('./Figures/Example%s_reduceData%s_Application2%s.pdf',exampleNum_i,reduceData, figType_))
    {
      par(mar=c(5, 5, 5, 1) )
      xlim_ <- 1+summary(unlist(FINALMAT[,grepl(colnames(FINALMAT),pattern="MatchedDatasetSize")]) )[c(1,6)]
      plot(0,type='l',cex=2,lwd=1.5,col='white',
           cex.lab = 2, xlab = 'Size of Matched Dataset',
           ylab = 'Estimated Coefficient',
           ylim = ylim__<-summary(c(COEF_TRUE,unlist(FINALMAT[,grep(colnames(FINALMAT),pattern="RegCoef")])))[c(1,6)]*c(1,1.15),
           xaxt = 'n',
           xlim=xlim_,
           cex.axis = cex_axis,
           log="x",pch='',lty=2)
      sfsmisc::eaxis(1, at = 10^(0:100),cex.axis = cex_axis)
      points(1000,0,pch="|",col="darkgray",cex=3)

      for(i_ in which(save_names_letters == "ML")){
        letter_i <- save_names_letters[i_]
        save_name_i_sub <- save_name_sub[i_]
        plot_col_i <- plot_col_vec[i_]
        plot_lty_i <- plot_lty_vec[i_]

        matched_data_size_ <- eval(parse(text=sprintf("FINALMAT$%s_MatchedDatasetSize",save_name_i_sub)))
        whichNon0 <- which(matched_data_size_>0)
        take_indices_ <- try(eval(parse(text=sprintf("divideFxn(FINALMAT$%s_MatchedDatasetSize[whichNon0])",save_name_i_sub))),T)
        RegCoef_ = eval(parse(text=sprintf("FINALMAT$%s_RegCoef",save_name_i_sub)))
        RegCoefSE_ = eval(parse(text=sprintf("FINALMAT$%s_RegSE",save_name_i_sub)))
        points(matched_data_size_[whichNon0]+eps_, RegCoef_[whichNon0] ,pch=' ',cex=2,type='l',col=plot_col_i,lty=plot_lty_i, lwd = 4)
        points(matched_data_size_[whichNon0]+eps_, RegCoef_[whichNon0]-1.96*RegCoefSE_[whichNon0] ,pch=' ',cex=2,type='l',col=plot_col_i,lty=1, lwd = 1)
        points(matched_data_size_[whichNon0]+eps_, RegCoef_[whichNon0]+1.96*RegCoefSE_[whichNon0] ,pch=' ',cex=2,type='l',col=plot_col_i,lty=1, lwd = 1)
      }

      abline(h=COEF_TRUE_LB,lwd=3,lty = 2,col="lightgray")
      abline(h=COEF_TRUE,lwd=3,lty = 1,col="darkgray")
      abline(h=COEF_TRUE_UB,lwd=3,lty = 2,col="lightgray")
      text(1000,COEF_TRUE,labels = "Coefficient Using Correct Matches \n (From Human Coding)",cex=1.25,col="darkgray")
    }
    dev.off()
  }
  }
}
